rm(list=ls())
library(tidyverse)
library(readxl)

# Read and structure illiteracy rates for census 2000,2010

file_2000 <- list.files(here::here("data","raw","IBGE"), pattern = "tab1521_mun_literacy_2000", full.names = T)

literacy_2000 <- read_excel(file_2000,
                           sheet = "Tabela",
                           skip = 6,
                           col_names = c("mun_code","mun_name","var_desc","condition","literacy_perc")) %>% 
  filter(!str_detect(mun_code,"Fonte")) %>% 
  mutate(census_year = 2000,
         mun_code = substring(mun_code,1,6)) %>% 
  select(mun_code,mun_name,census_year,literacy_perc)

file_2010 <- list.files(here::here("data","raw","IBGE"), pattern = "tab1383_mun_literacy_2010", full.names = T)
  
literacy_2010 <- read_excel(file_2010,
                            sheet = "Tabela",
                            skip = 6,
                            col_names = c("mun_code","mun_name","literacy_perc")) %>% 
  filter(!str_detect(mun_code,"Fonte")) %>% 
  mutate(census_year = 2010,
         mun_code = substring(mun_code,1,6)) %>% 
  select(mun_code,mun_name,census_year,literacy_perc)

# Bind and create illiteracy indicator
literacy <- bind_rows(literacy_2000,literacy_2010)

illiteracy <- literacy %>% 
  mutate(illiteracy_perc = 100-literacy_perc) %>% 
  select(-literacy_perc)


# Read and structure higher ed rates for census 2000,2010

file_2000 <- list.files(here::here("data","raw","IBGE"), pattern = "tab2982_mun_higher_ed_2000", full.names = T)

higher_ed_2000 <- read_excel(file_2000,
                            sheet = "Tabela",
                            skip = 6,
                            col_names = c("mun_code","mun_name","age_group","bachelor_perc","master_phd_perc"),
                           na = "-") %>% 
  filter(!str_detect(mun_code,"Fonte")) %>% 
  mutate(census_year = 2000,
         mun_code = substring(mun_code,1,6)) %>% 
  select(-age_group,-master_phd_perc)

file_2010 <- list.files(here::here("data","raw","IBGE"), pattern = "tab3547_mun_higher_ed_2010", full.names = T)

higher_ed_2010 <- read_excel(file_2010,
                             sheet = "Tabela",
                             skip = 6,
                             col_names = c("mun_code","mun_name","educ_level","bachelor_perc")) %>% 
  filter(!str_detect(mun_code,"Fonte")) %>% 
  mutate(census_year = 2010,
         mun_code = substring(mun_code,1,6)) %>% 
  select(-educ_level)

# Bind
higher_ed <- bind_rows(higher_ed_2000,higher_ed_2010) %>% 
  select(-mun_name)

# Join illiteracy and higher_ed
educ_mun <- illiteracy %>% 
  full_join(higher_ed, by = c("mun_code","census_year")) 

# SaveRdS ----

write_rds(educ_mun, here::here("data","processed","citycharacteristics","educ_mun.rds"))
